ggplot(corn %>%
filter(date > '2007-01-01' & date < '2016-01-01'),
aes(x = date, y = value)) +
geom_line() + ggtitle('Historical Corn Prices - USD ($) Per Bushel')
ggplot(rain %>%
filter(date > '2007-01-01'),
aes(x = date, y = pr, col=Country)) + geom_line()
c = corn %>%
tbl_time(date) %>%
as_period('monthly', side='start') %>%
mutate(date = lubridate::floor_date(date,'month'))
r = rain %>%
select(date, Country, pr) %>%
spread(Country, pr)
df = c %>%
left_join(r, by = 'date') %>%
rename(corn = value) %>%
drop_na() #%>% gather(nam,val,-date)
p = ggplot(df)
p + geom_point(aes(x = corn, y = AUS))
p + geom_point(aes(x = corn, y = BRA))
p + geom_point(aes(x = corn, y = USA))
plot(df %>% select(-date))
“A time series is stationary if a shift in time doesn’t cause a change in the shape of the distribution. Basic properties of the distribution like the mean, variance, and covariance are constant over time.”
Stationary vs Seasonal Time series - http://www.statisticshowto.com/stationarity/
Most statistical models that have been developed assume stationarity.
# Example from: https://www.analyticsvidhya.com/blog/2015/12/complete-tutorial-time-series-modeling/
AirPassengers
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 112 118 132 129 121 135 148 148 136 119 104 118
## 1950 115 126 141 135 125 149 170 170 158 133 114 140
## 1951 145 150 178 163 172 178 199 199 184 162 146 166
## 1952 171 180 193 181 183 218 230 242 209 191 172 194
## 1953 196 196 236 235 229 243 264 272 237 211 180 201
## 1954 204 188 235 227 234 264 302 293 259 229 203 229
## 1955 242 233 267 269 270 315 364 347 312 274 237 278
## 1956 284 277 317 313 318 374 413 405 355 306 271 306
## 1957 315 301 356 348 355 422 465 467 404 347 305 336
## 1958 340 318 362 348 363 435 491 505 404 359 310 337
## 1959 360 342 406 396 420 472 548 559 463 407 362 405
## 1960 417 391 419 461 472 535 622 606 508 461 390 432
plot(AirPassengers)
abline(reg = lm(AirPassengers ~ time(AirPassengers)))
#convert to df for rolling operations
ap = matrix(AirPassengers, nco = 1)
t1 = as.integer(min(time(AirPassengers)))
t2 = as.integer(max(time(AirPassengers)))
y = sort(rep(seq(from = t1, to = t2, by = 1), t2-t1 + 1))
m = rep(seq(from = 1, to = 12, by = 1), t2-t1 + 1)
apdf = tibble(year = y, month = m, passengers = ap[,1])
apdf = apdf %>% mutate(date = as.Date(paste0(year,'-',month,'-01'))) %>%
select(date,passengers)
#remove last 3 years of data
apdf_3 = apdf %>% mutate(x = 1:nrow(apdf)) %>% filter(date < '1958-01-01')
ts_fit = lm(passengers ~ x, data = apdf_3)
ts_fit
##
## Call:
## lm(formula = passengers ~ x, data = apdf_3)
##
## Coefficients:
## (Intercept) x
## 95.042 2.493
intercept = ts_fit$coefficients[1]
slope = ts_fit$coefficients[2]
apdf_prediction = apdf_3 %>% mutate(type = 'historical') %>%
bind_rows(apdf %>% filter(date >= '1958-01-01') %>% mutate(type='actual')) %>%
mutate(x = 1:nrow(apdf)) %>%
mutate(prediction = (x*slope) + intercept)
ggplot(apdf_prediction, aes(x=date, y=passengers, col=type)) +
geom_line() +
geom_line(aes(y=prediction), col='black')
rolling_mean = rollify(mean, window = 8)
tmp = apdf %>% mutate(ma = rolling_mean(passengers)) %>% drop_na()
ggplot(tmp, aes(x=date)) + geom_line(aes(y=passengers)) + geom_line(aes(y=ma), col = 'red')
plot(log(AirPassengers))
plot(diff(AirPassengers))
plot(diff(log(AirPassengers)))
adf.test(diff(log(AirPassengers)))
##
## Augmented Dickey-Fuller Test
##
## data: diff(log(AirPassengers))
## Dickey-Fuller = -6.4313, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
acf(log(AirPassengers))
acf(diff(log(AirPassengers)))
pacf(diff(log(AirPassengers)))
Non-Seasonal ARIMA Model is built:
fit <- arima(log(AirPassengers),
order = c(0, 1, 1))
pred <- predict(fit, n.ahead = 10*12)
ts.plot(AirPassengers,2.718^pred$pred, log = "y", lty = c(1,3))
fit <- arima(log(AirPassengers),
order = c(0, 1, 1),
seasonal = list(order = c(0, 1, 1),
period = 12))
pred <- predict(fit, n.ahead = 10*12)
ts.plot(AirPassengers,2.718^pred$pred, log = "y", lty = c(1,3))
dow = tq_index("DOW") %>%
tq_get(get = "stock.prices") %>%
drop_na()
head(dow, 10)
p = ggplot(dow, aes(x = date, y = close, col = symbol)) +
geom_line() +
gghighlight(max(close) > 250) +
theme_minimal() +
facet_wrap(~ symbol)
print(p)
ba = dow %>%
filter(symbol == 'BA') %>%
select(date, close)
plotly::ggplotly(ggplot(ba, aes(x = date, y = close)) + geom_line() + geom_smooth(method = lm, se = FALSE))
adf.test(zoo::zoo(ba$close, ba$date))
##
## Augmented Dickey-Fuller Test
##
## data: zoo::zoo(ba$close, ba$date)
## Dickey-Fuller = 0.43376, Lag order = 13, p-value = 0.99
## alternative hypothesis: stationary
ts_fit = lm(close ~ date, data = ba)
ts_fit
##
## Call:
## lm(formula = close ~ date, data = ba)
##
## Coefficients:
## (Intercept) date
## -737.12864 0.05401
res = tibble(residuals = resid(ts_fit))
ggplot(res, aes(x = 1:nrow(res), y = residuals)) + geom_point() + geom_rug(sides = 'l')
ggplot(res, aes(x = residuals)) + geom_histogram(aes(y = ..density..), bins = 40) + geom_density(col = 'red', size = 1.5)
ret = ba %>%
tq_transmute(close, mutate_fun = dailyReturn)
ggplot(ret, aes(x = date, y = daily.returns)) + geom_line() + geom_smooth(method = lm, se = FALSE)
adf.test(zoo::zoo(ret$daily.returns, ret$date))
##
## Augmented Dickey-Fuller Test
##
## data: zoo::zoo(ret$daily.returns, ret$date)
## Dickey-Fuller = -14.308, Lag order = 13, p-value = 0.01
## alternative hypothesis: stationary
log_close = ba %>%
mutate(log_close = log(close)) %>%
mutate(daysFromStart = 1:nrow(ba))
lc = log_close %>%
select(daysFromStart, close, log_close) %>%
gather(price_type, close, -daysFromStart)
ggplot(lc,
aes(daysFromStart, close)) +
geom_point() +
# Here comes the gganimate code
transition_states(
price_type,
transition_length = 2,
state_length = 1
) +
view_follow()
log_close = ba %>%
mutate(close = close / 100) %>% # divide by 100 for animation purposes
mutate(log_close = log(close),
log_close_diff = 10 * (log_close - lag(log_close))) %>% # multiply by 10 for animation purposes
mutate(daysFromStart = 1:nrow(ba))
lc = log_close %>%
select(daysFromStart, close, log_close, log_close_diff) %>%
gather(price_type, close, -daysFromStart) %>%
drop_na()
ggplot(lc,
aes(daysFromStart, close)) +
geom_point(col = 'darkgreen', alpha = 0.5) +
# Here comes the gganimate code
transition_states(
price_type,
transition_length = 2,
state_length = 1
)
ggplot(lc,
aes(close)) +
geom_density(fill = 'darkgreen', alpha = 0.25) +
# Here comes the gganimate code
transition_states(
price_type,
transition_length = 2,
state_length = 1
)
lc = ba %>%
mutate(close = close) %>% # divide by 100 for animation purposes
mutate(log_close = log(close),
log_close_diff = log_close - lag(log_close)) %>%
drop_na()
ggplot(lc, aes(x = log_close_diff)) + geom_histogram(bins = 40)